library(tidyverse)
## ── Attaching packages ───────────────────────────────── tidyverse 1.3.0 ──
## ✔ ggplot2 3.2.1     ✔ purrr   0.3.4
## ✔ tibble  3.0.3     ✔ dplyr   1.0.2
## ✔ tidyr   1.1.2     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.5.0
## ── Conflicts ──────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
Confirmed_State_3_13 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-13-2020.csv")) %>%
  rename(Country_Region = "Country/Region", Province_State = "Province/State") %>% 
  filter (Country_Region == "US") %>% 
  group_by(Province_State, Country_Region) %>% 
  summarise(Confirmed = sum(Confirmed)) 
str(Confirmed_State_3_13)
## tibble [53 × 3] (S3: grouped_df/tbl_df/tbl/data.frame)
##  $ Province_State: chr [1:53] "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ Country_Region: chr [1:53] "US" "US" "US" "US" ...
##  $ Confirmed     : num [1:53] 5 1 9 6 282 49 11 4 46 10 ...
##  - attr(*, "groups")= tibble [53 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Province_State: chr [1:53] "Alabama" "Alaska" "Arizona" "Arkansas" ...
##   ..$ .rows         : list<int> [1:53] 
##   .. ..$ : int 1
##   .. ..$ : int 2
##   .. ..$ : int 3
##   .. ..$ : int 4
##   .. ..$ : int 5
##   .. ..$ : int 6
##   .. ..$ : int 7
##   .. ..$ : int 8
##   .. ..$ : int 9
##   .. ..$ : int 10
##   .. ..$ : int 11
##   .. ..$ : int 12
##   .. ..$ : int 13
##   .. ..$ : int 14
##   .. ..$ : int 15
##   .. ..$ : int 16
##   .. ..$ : int 17
##   .. ..$ : int 18
##   .. ..$ : int 19
##   .. ..$ : int 20
##   .. ..$ : int 21
##   .. ..$ : int 22
##   .. ..$ : int 23
##   .. ..$ : int 24
##   .. ..$ : int 25
##   .. ..$ : int 26
##   .. ..$ : int 27
##   .. ..$ : int 28
##   .. ..$ : int 29
##   .. ..$ : int 30
##   .. ..$ : int 31
##   .. ..$ : int 32
##   .. ..$ : int 33
##   .. ..$ : int 34
##   .. ..$ : int 35
##   .. ..$ : int 36
##   .. ..$ : int 37
##   .. ..$ : int 38
##   .. ..$ : int 39
##   .. ..$ : int 40
##   .. ..$ : int 41
##   .. ..$ : int 42
##   .. ..$ : int 43
##   .. ..$ : int 44
##   .. ..$ : int 45
##   .. ..$ : int 46
##   .. ..$ : int 47
##   .. ..$ : int 48
##   .. ..$ : int 49
##   .. ..$ : int 50
##   .. ..$ : int 51
##   .. ..$ : int 52
##   .. ..$ : int 53
##   .. ..@ ptype: int(0) 
##   ..- attr(*, ".drop")= logi TRUE
Confirmed_State_9_13 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-13-2020.csv")) %>% 
  filter (Country_Region == "US") %>% 
  group_by(Province_State, Country_Region) %>% 
  summarise(Confirmed = sum(Confirmed)) 
str(Confirmed_State_9_13)
## tibble [58 × 3] (S3: grouped_df/tbl_df/tbl/data.frame)
##  $ Province_State: chr [1:58] "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ Country_Region: chr [1:58] "US" "US" "US" "US" ...
##  $ Confirmed     : num [1:58] 138755 6268 208512 70219 761728 ...
##  - attr(*, "groups")= tibble [58 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Province_State: chr [1:58] "Alabama" "Alaska" "Arizona" "Arkansas" ...
##   ..$ .rows         : list<int> [1:58] 
##   .. ..$ : int 1
##   .. ..$ : int 2
##   .. ..$ : int 3
##   .. ..$ : int 4
##   .. ..$ : int 5
##   .. ..$ : int 6
##   .. ..$ : int 7
##   .. ..$ : int 8
##   .. ..$ : int 9
##   .. ..$ : int 10
##   .. ..$ : int 11
##   .. ..$ : int 12
##   .. ..$ : int 13
##   .. ..$ : int 14
##   .. ..$ : int 15
##   .. ..$ : int 16
##   .. ..$ : int 17
##   .. ..$ : int 18
##   .. ..$ : int 19
##   .. ..$ : int 20
##   .. ..$ : int 21
##   .. ..$ : int 22
##   .. ..$ : int 23
##   .. ..$ : int 24
##   .. ..$ : int 25
##   .. ..$ : int 26
##   .. ..$ : int 27
##   .. ..$ : int 28
##   .. ..$ : int 29
##   .. ..$ : int 30
##   .. ..$ : int 31
##   .. ..$ : int 32
##   .. ..$ : int 33
##   .. ..$ : int 34
##   .. ..$ : int 35
##   .. ..$ : int 36
##   .. ..$ : int 37
##   .. ..$ : int 38
##   .. ..$ : int 39
##   .. ..$ : int 40
##   .. ..$ : int 41
##   .. ..$ : int 42
##   .. ..$ : int 43
##   .. ..$ : int 44
##   .. ..$ : int 45
##   .. ..$ : int 46
##   .. ..$ : int 47
##   .. ..$ : int 48
##   .. ..$ : int 49
##   .. ..$ : int 50
##   .. ..$ : int 51
##   .. ..$ : int 52
##   .. ..$ : int 53
##   .. ..$ : int 54
##   .. ..$ : int 55
##   .. ..$ : int 56
##   .. ..$ : int 57
##   .. ..$ : int 58
##   .. ..@ ptype: int(0) 
##   ..- attr(*, ".drop")= logi TRUE
setdiff(Confirmed_State_9_13$Province_State, Confirmed_State_3_13$Province_State)
## [1] "Guam"                     "Northern Mariana Islands"
## [3] "Puerto Rico"              "Recovered"               
## [5] "Virgin Islands"
Confirmed_State_9_13 <- Confirmed_State_9_13 %>% 
  filter(Province_State != "Recovered") 
Confirmed_State_3_13_9_13_joined <- full_join(Confirmed_State_3_13,
      Confirmed_State_9_13, by = c("Province_State"))
head(Confirmed_State_3_13_9_13_joined)
## # A tibble: 6 x 5
## # Groups:   Province_State [6]
##   Province_State Country_Region.x Confirmed.x Country_Region.y Confirmed.y
##   <chr>          <chr>                  <dbl> <chr>                  <dbl>
## 1 Alabama        US                         5 US                    138755
## 2 Alaska         US                         1 US                      6268
## 3 Arizona        US                         9 US                    208512
## 4 Arkansas       US                         6 US                     70219
## 5 California     US                       282 US                    761728
## 6 Colorado       US                        49 US                     61293
tail(Confirmed_State_3_13_9_13_joined, 5)
## # A tibble: 5 x 5
## # Groups:   Province_State [5]
##   Province_State  Country_Region.x Confirmed.x Country_Region.y Confirmed.y
##   <chr>           <chr>                  <dbl> <chr>                  <dbl>
## 1 Wyoming         US                         1 US                      4346
## 2 Guam            <NA>                      NA US                      1863
## 3 Northern Maria… <NA>                      NA US                        60
## 4 Puerto Rico     <NA>                      NA US                     37380
## 5 Virgin Islands  <NA>                      NA US                      1220
which(is.na(Confirmed_State_3_13_9_13_joined))
## [1] 111 112 113 114 168 169 170 171
Confirmed_State_3_13_9_13_joined <- full_join(Confirmed_State_3_13,
      Confirmed_State_9_13, by = c("Province_State")) %>% 
      rename(Confirmed_3_13_2020 = "Confirmed.x", Confirmed_9_13_2020 = "Confirmed.y") %>% 
      select(-Country_Region.x, -Country_Region.y) %>% 
      replace_na(list(Confirmed_3_13_2020 = 0))
head(Confirmed_State_3_13_9_13_joined)
## # A tibble: 6 x 3
## # Groups:   Province_State [6]
##   Province_State Confirmed_3_13_2020 Confirmed_9_13_2020
##   <chr>                        <dbl>               <dbl>
## 1 Alabama                          5              138755
## 2 Alaska                           1                6268
## 3 Arizona                          9              208512
## 4 Arkansas                         6               70219
## 5 California                     282              761728
## 6 Colorado                        49               61293
which(is.na(Confirmed_State_3_13_9_13_joined))
## integer(0)

Switching between wide and long table formats.

.

Confirmed_State_3_13_9_13_joined_long <- Confirmed_State_3_13_9_13_joined %>% 
              pivot_longer(-c(Province_State),
                            names_to = "Date", values_to = "Confirmed")
# to adjust figure size {r, fig.width = 5, fig.height = 10}
ggplot(Confirmed_State_3_13_9_13_joined_long, aes(x = Confirmed,  y = Province_State))  + 
    geom_point(aes(color = Date)) 

Working with the time series data

Data Wrangling

time_series_confirmed <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
  rename(Province_State = "Province/State", Country_Region = "Country/Region")
 download.file(url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv", 
               destfile = "data/time_series_covid19_confirmed_global.csv")
time_series_confirmed <- read_csv("data/time_series_covid19_confirmed_global.csv")%>%
  rename(Province_State = "Province/State", Country_Region = "Country/Region")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
head(time_series_confirmed)
## # A tibble: 6 x 249
##   Province_State Country_Region   Lat   Long `1/22/20` `1/23/20` `1/24/20`
##   <chr>          <chr>          <dbl>  <dbl>     <dbl>     <dbl>     <dbl>
## 1 <NA>           Afghanistan     33.9  67.7          0         0         0
## 2 <NA>           Albania         41.2  20.2          0         0         0
## 3 <NA>           Algeria         28.0   1.66         0         0         0
## 4 <NA>           Andorra         42.5   1.52         0         0         0
## 5 <NA>           Angola         -11.2  17.9          0         0         0
## 6 <NA>           Antigua and B…  17.1 -61.8          0         0         0
## # … with 242 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>,
## #   `1/27/20` <dbl>, `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>,
## #   `1/31/20` <dbl>, `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>,
## #   `2/4/20` <dbl>, `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>,
## #   `2/8/20` <dbl>, `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>,
## #   `2/12/20` <dbl>, `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>,
## #   `2/16/20` <dbl>, `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>,
## #   `2/20/20` <dbl>, `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>,
## #   `2/24/20` <dbl>, `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>,
## #   `2/28/20` <dbl>, `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>,
## #   `3/3/20` <dbl>, `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>,
## #   `3/7/20` <dbl>, `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>,
## #   `3/11/20` <dbl>, `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>,
## #   `3/15/20` <dbl>, `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>,
## #   `3/19/20` <dbl>, `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>,
## #   `3/23/20` <dbl>, `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>,
## #   `3/27/20` <dbl>, `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>,
## #   `3/31/20` <dbl>, `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>,
## #   `4/4/20` <dbl>, `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>,
## #   `4/8/20` <dbl>, `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>,
## #   `4/12/20` <dbl>, `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>,
## #   `4/16/20` <dbl>, `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>,
## #   `4/20/20` <dbl>, `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>,
## #   `4/24/20` <dbl>, `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>,
## #   `4/28/20` <dbl>, `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>,
## #   `5/2/20` <dbl>, `5/3/20` <dbl>, …
time_series_confirmed_long <- time_series_confirmed %>% 
               pivot_longer(-c(Province_State, Country_Region, Lat, Long),
                            names_to = "Date", values_to = "Confirmed") 
head(time_series_confirmed_long)
## # A tibble: 6 x 6
##   Province_State Country_Region   Lat  Long Date    Confirmed
##   <chr>          <chr>          <dbl> <dbl> <chr>       <dbl>
## 1 <NA>           Afghanistan     33.9  67.7 1/22/20         0
## 2 <NA>           Afghanistan     33.9  67.7 1/23/20         0
## 3 <NA>           Afghanistan     33.9  67.7 1/24/20         0
## 4 <NA>           Afghanistan     33.9  67.7 1/25/20         0
## 5 <NA>           Afghanistan     33.9  67.7 1/26/20         0
## 6 <NA>           Afghanistan     33.9  67.7 1/27/20         0
 download.file(url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv", 
               destfile = "data/time_series_covid19_deaths_global.csv")
time_series_deaths <- read_csv("data/time_series_covid19_deaths_global.csv") %>%
 rename(Province_State = "Province/State", Country_Region = "Country/Region")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
time_series_deaths_long <- time_series_deaths %>% 
               pivot_longer(-c(Province_State, Country_Region, Lat, Long),
                            names_to = "Date", values_to = "Deaths") 
head(time_series_deaths_long)
## # A tibble: 6 x 6
##   Province_State Country_Region   Lat  Long Date    Deaths
##   <chr>          <chr>          <dbl> <dbl> <chr>    <dbl>
## 1 <NA>           Afghanistan     33.9  67.7 1/22/20      0
## 2 <NA>           Afghanistan     33.9  67.7 1/23/20      0
## 3 <NA>           Afghanistan     33.9  67.7 1/24/20      0
## 4 <NA>           Afghanistan     33.9  67.7 1/25/20      0
## 5 <NA>           Afghanistan     33.9  67.7 1/26/20      0
## 6 <NA>           Afghanistan     33.9  67.7 1/27/20      0

Joining the time series tables

time_series_confirmed_long <- time_series_confirmed_long %>% 
  unite(Key, Province_State, Country_Region, Date, sep = ".", remove = FALSE)
head(time_series_confirmed_long)
## # A tibble: 6 x 7
##   Key            Province_State Country_Region   Lat  Long Date   Confirmed
##   <chr>          <chr>          <chr>          <dbl> <dbl> <chr>      <dbl>
## 1 NA.Afghanista… <NA>           Afghanistan     33.9  67.7 1/22/…         0
## 2 NA.Afghanista… <NA>           Afghanistan     33.9  67.7 1/23/…         0
## 3 NA.Afghanista… <NA>           Afghanistan     33.9  67.7 1/24/…         0
## 4 NA.Afghanista… <NA>           Afghanistan     33.9  67.7 1/25/…         0
## 5 NA.Afghanista… <NA>           Afghanistan     33.9  67.7 1/26/…         0
## 6 NA.Afghanista… <NA>           Afghanistan     33.9  67.7 1/27/…         0
time_series_deaths_long <- time_series_deaths_long %>% 
  unite(Key, Province_State, Country_Region, Date, sep = ".") %>% 
  select(Key, Deaths)
time_series_long_joined <- full_join(time_series_confirmed_long,
              time_series_deaths_long, by = c("Key")) %>% 
              select(-Key)
head(time_series_long_joined)
## # A tibble: 6 x 7
##   Province_State Country_Region   Lat  Long Date    Confirmed Deaths
##   <chr>          <chr>          <dbl> <dbl> <chr>       <dbl>  <dbl>
## 1 <NA>           Afghanistan     33.9  67.7 1/22/20         0      0
## 2 <NA>           Afghanistan     33.9  67.7 1/23/20         0      0
## 3 <NA>           Afghanistan     33.9  67.7 1/24/20         0      0
## 4 <NA>           Afghanistan     33.9  67.7 1/25/20         0      0
## 5 <NA>           Afghanistan     33.9  67.7 1/26/20         0      0
## 6 <NA>           Afghanistan     33.9  67.7 1/27/20         0      0
which(is.na(time_series_long_joined$Confirmed))
##    [1] 65171 65172 65173 65174 65175 65176 65177 65178 65179 65180 65181
##   [12] 65182 65183 65184 65185 65186 65187 65188 65189 65190 65191 65192
##   [23] 65193 65194 65195 65196 65197 65198 65199 65200 65201 65202 65203
##   [34] 65204 65205 65206 65207 65208 65209 65210 65211 65212 65213 65214
##   [45] 65215 65216 65217 65218 65219 65220 65221 65222 65223 65224 65225
##   [56] 65226 65227 65228 65229 65230 65231 65232 65233 65234 65235 65236
##   [67] 65237 65238 65239 65240 65241 65242 65243 65244 65245 65246 65247
##   [78] 65248 65249 65250 65251 65252 65253 65254 65255 65256 65257 65258
##   [89] 65259 65260 65261 65262 65263 65264 65265 65266 65267 65268 65269
##  [100] 65270 65271 65272 65273 65274 65275 65276 65277 65278 65279 65280
##  [111] 65281 65282 65283 65284 65285 65286 65287 65288 65289 65290 65291
##  [122] 65292 65293 65294 65295 65296 65297 65298 65299 65300 65301 65302
##  [133] 65303 65304 65305 65306 65307 65308 65309 65310 65311 65312 65313
##  [144] 65314 65315 65316 65317 65318 65319 65320 65321 65322 65323 65324
##  [155] 65325 65326 65327 65328 65329 65330 65331 65332 65333 65334 65335
##  [166] 65336 65337 65338 65339 65340 65341 65342 65343 65344 65345 65346
##  [177] 65347 65348 65349 65350 65351 65352 65353 65354 65355 65356 65357
##  [188] 65358 65359 65360 65361 65362 65363 65364 65365 65366 65367 65368
##  [199] 65369 65370 65371 65372 65373 65374 65375 65376 65377 65378 65379
##  [210] 65380 65381 65382 65383 65384 65385 65386 65387 65388 65389 65390
##  [221] 65391 65392 65393 65394 65395 65396 65397 65398 65399 65400 65401
##  [232] 65402 65403 65404 65405 65406 65407 65408 65409 65410 65411 65412
##  [243] 65413 65414 65415 65416 65417 65418 65419 65420 65421 65422 65423
##  [254] 65424 65425 65426 65427 65428 65429 65430 65431 65432 65433 65434
##  [265] 65435 65436 65437 65438 65439 65440 65441 65442 65443 65444 65445
##  [276] 65446 65447 65448 65449 65450 65451 65452 65453 65454 65455 65456
##  [287] 65457 65458 65459 65460 65461 65462 65463 65464 65465 65466 65467
##  [298] 65468 65469 65470 65471 65472 65473 65474 65475 65476 65477 65478
##  [309] 65479 65480 65481 65482 65483 65484 65485 65486 65487 65488 65489
##  [320] 65490 65491 65492 65493 65494 65495 65496 65497 65498 65499 65500
##  [331] 65501 65502 65503 65504 65505 65506 65507 65508 65509 65510 65511
##  [342] 65512 65513 65514 65515 65516 65517 65518 65519 65520 65521 65522
##  [353] 65523 65524 65525 65526 65527 65528 65529 65530 65531 65532 65533
##  [364] 65534 65535 65536 65537 65538 65539 65540 65541 65542 65543 65544
##  [375] 65545 65546 65547 65548 65549 65550 65551 65552 65553 65554 65555
##  [386] 65556 65557 65558 65559 65560 65561 65562 65563 65564 65565 65566
##  [397] 65567 65568 65569 65570 65571 65572 65573 65574 65575 65576 65577
##  [408] 65578 65579 65580 65581 65582 65583 65584 65585 65586 65587 65588
##  [419] 65589 65590 65591 65592 65593 65594 65595 65596 65597 65598 65599
##  [430] 65600 65601 65602 65603 65604 65605 65606 65607 65608 65609 65610
##  [441] 65611 65612 65613 65614 65615 65616 65617 65618 65619 65620 65621
##  [452] 65622 65623 65624 65625 65626 65627 65628 65629 65630 65631 65632
##  [463] 65633 65634 65635 65636 65637 65638 65639 65640 65641 65642 65643
##  [474] 65644 65645 65646 65647 65648 65649 65650 65651 65652 65653 65654
##  [485] 65655 65656 65657 65658 65659 65660 65661 65662 65663 65664 65665
##  [496] 65666 65667 65668 65669 65670 65671 65672 65673 65674 65675 65676
##  [507] 65677 65678 65679 65680 65681 65682 65683 65684 65685 65686 65687
##  [518] 65688 65689 65690 65691 65692 65693 65694 65695 65696 65697 65698
##  [529] 65699 65700 65701 65702 65703 65704 65705 65706 65707 65708 65709
##  [540] 65710 65711 65712 65713 65714 65715 65716 65717 65718 65719 65720
##  [551] 65721 65722 65723 65724 65725 65726 65727 65728 65729 65730 65731
##  [562] 65732 65733 65734 65735 65736 65737 65738 65739 65740 65741 65742
##  [573] 65743 65744 65745 65746 65747 65748 65749 65750 65751 65752 65753
##  [584] 65754 65755 65756 65757 65758 65759 65760 65761 65762 65763 65764
##  [595] 65765 65766 65767 65768 65769 65770 65771 65772 65773 65774 65775
##  [606] 65776 65777 65778 65779 65780 65781 65782 65783 65784 65785 65786
##  [617] 65787 65788 65789 65790 65791 65792 65793 65794 65795 65796 65797
##  [628] 65798 65799 65800 65801 65802 65803 65804 65805 65806 65807 65808
##  [639] 65809 65810 65811 65812 65813 65814 65815 65816 65817 65818 65819
##  [650] 65820 65821 65822 65823 65824 65825 65826 65827 65828 65829 65830
##  [661] 65831 65832 65833 65834 65835 65836 65837 65838 65839 65840 65841
##  [672] 65842 65843 65844 65845 65846 65847 65848 65849 65850 65851 65852
##  [683] 65853 65854 65855 65856 65857 65858 65859 65860 65861 65862 65863
##  [694] 65864 65865 65866 65867 65868 65869 65870 65871 65872 65873 65874
##  [705] 65875 65876 65877 65878 65879 65880 65881 65882 65883 65884 65885
##  [716] 65886 65887 65888 65889 65890 65891 65892 65893 65894 65895 65896
##  [727] 65897 65898 65899 65900 65901 65902 65903 65904 65905 65906 65907
##  [738] 65908 65909 65910 65911 65912 65913 65914 65915 65916 65917 65918
##  [749] 65919 65920 65921 65922 65923 65924 65925 65926 65927 65928 65929
##  [760] 65930 65931 65932 65933 65934 65935 65936 65937 65938 65939 65940
##  [771] 65941 65942 65943 65944 65945 65946 65947 65948 65949 65950 65951
##  [782] 65952 65953 65954 65955 65956 65957 65958 65959 65960 65961 65962
##  [793] 65963 65964 65965 65966 65967 65968 65969 65970 65971 65972 65973
##  [804] 65974 65975 65976 65977 65978 65979 65980 65981 65982 65983 65984
##  [815] 65985 65986 65987 65988 65989 65990 65991 65992 65993 65994 65995
##  [826] 65996 65997 65998 65999 66000 66001 66002 66003 66004 66005 66006
##  [837] 66007 66008 66009 66010 66011 66012 66013 66014 66015 66016 66017
##  [848] 66018 66019 66020 66021 66022 66023 66024 66025 66026 66027 66028
##  [859] 66029 66030 66031 66032 66033 66034 66035 66036 66037 66038 66039
##  [870] 66040 66041 66042 66043 66044 66045 66046 66047 66048 66049 66050
##  [881] 66051 66052 66053 66054 66055 66056 66057 66058 66059 66060 66061
##  [892] 66062 66063 66064 66065 66066 66067 66068 66069 66070 66071 66072
##  [903] 66073 66074 66075 66076 66077 66078 66079 66080 66081 66082 66083
##  [914] 66084 66085 66086 66087 66088 66089 66090 66091 66092 66093 66094
##  [925] 66095 66096 66097 66098 66099 66100 66101 66102 66103 66104 66105
##  [936] 66106 66107 66108 66109 66110 66111 66112 66113 66114 66115 66116
##  [947] 66117 66118 66119 66120 66121 66122 66123 66124 66125 66126 66127
##  [958] 66128 66129 66130 66131 66132 66133 66134 66135 66136 66137 66138
##  [969] 66139 66140 66141 66142 66143 66144 66145 66146 66147 66148 66149
##  [980] 66150 66151 66152 66153 66154 66155 66156 66157 66158 66159 66160
##  [991] 66161 66162 66163 66164 66165 66166 66167 66168 66169 66170 66171
## [1002] 66172 66173 66174 66175 66176 66177 66178 66179 66180 66181 66182
## [1013] 66183 66184 66185 66186 66187 66188 66189 66190 66191 66192 66193
## [1024] 66194 66195 66196 66197 66198 66199 66200 66201 66202 66203 66204
## [1035] 66205 66206 66207 66208 66209 66210 66211 66212 66213 66214 66215
## [1046] 66216 66217 66218 66219 66220 66221 66222 66223 66224 66225 66226
## [1057] 66227 66228 66229 66230 66231 66232 66233 66234
which(is.na(time_series_long_joined$Deaths))
## integer(0)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
time_series_long_joined$Date <- mdy(time_series_long_joined$Date)
time_series_long_joined_counts <- time_series_long_joined %>% 
  pivot_longer(-c(Province_State, Country_Region, Lat, Long, Date),
               names_to = "Report_Type", values_to = "Counts")
head(time_series_long_joined_counts)
## # A tibble: 6 x 7
##   Province_State Country_Region   Lat  Long Date       Report_Type Counts
##   <chr>          <chr>          <dbl> <dbl> <date>     <chr>        <dbl>
## 1 <NA>           Afghanistan     33.9  67.7 2020-01-22 Confirmed        0
## 2 <NA>           Afghanistan     33.9  67.7 2020-01-22 Deaths           0
## 3 <NA>           Afghanistan     33.9  67.7 2020-01-23 Confirmed        0
## 4 <NA>           Afghanistan     33.9  67.7 2020-01-23 Deaths           0
## 5 <NA>           Afghanistan     33.9  67.7 2020-01-24 Confirmed        0
## 6 <NA>           Afghanistan     33.9  67.7 2020-01-24 Deaths           0

Making Graphs from the time series data

time_series_long_joined %>% 
  group_by(Country_Region,Date) %>% 
  summarise_at(c("Confirmed", "Deaths"), sum) %>% 
  filter (Country_Region == "US") %>% 
    ggplot(aes(x = Date,  y = Deaths)) + 
    geom_point() +
    geom_line() +
    ggtitle("US COVID-19 Deaths")

time_series_long_joined %>% 
  group_by(Country_Region,Date) %>% 
  summarise_at(c("Confirmed", "Deaths"), sum) %>% 
  filter (Country_Region %in% c("China","Japan", "Korea, South",
                                "Italy","Spain", "US")) %>% 
    ggplot(aes(x = Date,  y = Deaths)) + 
    geom_point() +
    geom_line() +
    ggtitle("COVID-19 Deaths") +
    facet_wrap(~Country_Region, ncol=2, scales="free_y")

time_series_long_joined %>% 
    group_by(Country_Region,Date) %>% 
    summarise_at(c("Confirmed", "Deaths"), sum) %>% 
    filter (Country_Region %in% c("China","France","Italy", 
                                "Korea, South", "US")) %>% 
    ggplot(aes(x = Date,  y = Deaths, color = Country_Region)) + 
    geom_point() +
    geom_line() +
    ggtitle("COVID-19 Deaths")

time_series_long_joined_counts %>% 
  group_by(Country_Region, Report_Type, Date) %>% 
  summarise(Counts = sum(Counts)) %>% 
  filter (Country_Region == "US") %>% 
    ggplot(aes(x = Date,  y = log2(Counts), fill = Report_Type, color = Report_Type)) + 
    geom_point() +
    geom_line() +
    ggtitle("US COVID-19 Cases")
## `summarise()` regrouping output by 'Country_Region', 'Report_Type' (override with `.groups` argument)

Exercise 1

See Lab5(part2datavisual).Rmd

Exercise 2

Confirmed_State_6_13 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/06-13-2020.csv")) %>%
  filter (Country_Region == "US") %>% 
  group_by(Province_State, Country_Region) %>% 
  summarise(Confirmed = sum(Confirmed)) 
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` regrouping output by 'Province_State' (override with `.groups` argument)
str(Confirmed_State_6_13)
Confirmed_State_9_13 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-13-2020.csv")) %>% filter(Country_Region == "US") %>%  group_by(Province_State, Country_Region) %>% summarise(Confirmed = sum(Confirmed))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` regrouping output by 'Province_State' (override with `.groups` argument)
str(Confirmed_State_9_13)
setdiff(Confirmed_State_6_13$Province_State, Confirmed_State_9_13$Province_State)
## character(0)
Confirmed_State_6_13_9_13_joined <- full_join(Confirmed_State_6_13,
      Confirmed_State_9_13, by = c("Province_State"))
head(Confirmed_State_6_13_9_13_joined)
## # A tibble: 6 x 5
## # Groups:   Province_State [6]
##   Province_State Country_Region.x Confirmed.x Country_Region.y Confirmed.y
##   <chr>          <chr>                  <dbl> <chr>                  <dbl>
## 1 Alabama        US                     24601 US                    138755
## 2 Alaska         US                       653 US                      6268
## 3 Arizona        US                     34660 US                    208512
## 4 Arkansas       US                     12095 US                     70219
## 5 California     US                    150018 US                    761728
## 6 Colorado       US                     29002 US                     61293
tail(Confirmed_State_6_13_9_13_joined, 5)
## # A tibble: 5 x 5
## # Groups:   Province_State [5]
##   Province_State Country_Region.x Confirmed.x Country_Region.y Confirmed.y
##   <chr>          <chr>                  <dbl> <chr>                  <dbl>
## 1 Virginia       US                     53869 US                    133742
## 2 Washington     US                     25538 US                     79826
## 3 West Virginia  US                      2274 US                     12705
## 4 Wisconsin      US                     22518 US                     89185
## 5 Wyoming        US                      1050 US                      4346
which(is.na(Confirmed_State_6_13_9_13_joined))
## integer(0)
Confirmed_State_6_13_9_13_joined <- full_join(Confirmed_State_6_13,
      Confirmed_State_9_13, by = c("Province_State")) %>% 
      rename(Confirmed_6_13_2020 = "Confirmed.x", Confirmed_9_13_2020 = "Confirmed.y") %>% 
      select(-Country_Region.x, -Country_Region.y) %>% 
      replace_na(list(Confirmed_6_13_2020 = 0))
head(Confirmed_State_6_13_9_13_joined)
## # A tibble: 6 x 3
## # Groups:   Province_State [6]
##   Province_State Confirmed_6_13_2020 Confirmed_9_13_2020
##   <chr>                        <dbl>               <dbl>
## 1 Alabama                      24601              138755
## 2 Alaska                         653                6268
## 3 Arizona                      34660              208512
## 4 Arkansas                     12095               70219
## 5 California                  150018              761728
## 6 Colorado                     29002               61293
which(is.na(Confirmed_State_6_13_9_13_joined))
## integer(0)
Confirmed_State_6_13_9_13_joined_long <- Confirmed_State_6_13_9_13_joined %>% 
              pivot_longer(-c(Province_State),
                            names_to = "Date", values_to = "Confirmed")
Confirmed_State_6_13_9_13_joined_long 
## # A tibble: 116 x 3
## # Groups:   Province_State [58]
##    Province_State Date                Confirmed
##    <chr>          <chr>                   <dbl>
##  1 Alabama        Confirmed_6_13_2020     24601
##  2 Alabama        Confirmed_9_13_2020    138755
##  3 Alaska         Confirmed_6_13_2020       653
##  4 Alaska         Confirmed_9_13_2020      6268
##  5 Arizona        Confirmed_6_13_2020     34660
##  6 Arizona        Confirmed_9_13_2020    208512
##  7 Arkansas       Confirmed_6_13_2020     12095
##  8 Arkansas       Confirmed_9_13_2020     70219
##  9 California     Confirmed_6_13_2020    150018
## 10 California     Confirmed_9_13_2020    761728
## # … with 106 more rows

##Exercise 2

ggplot(Confirmed_State_6_13_9_13_joined_long, aes(x = Province_State,  y = Confirmed, fill = Date)) +
    geom_bar(stat = "identity") 

##Exercise 3

ggplot(Confirmed_State_6_13_9_13_joined_long, aes(x = Province_State,  y = Confirmed, fill = Date)) +
    geom_bar(stat = "identity") + labs(title = "US Confimed COVID-19 Cases from 6/13/20 and 9/13/20",
         x = "State/Province in US",
         y = "Number of Confirmed Cases")

Exercise 4

time_series_long_joined_counts %>% group_by(Country_Region) %>% 
 ggplot(aes(x = Date, y = Counts)) + geom_point() + labs(x = "Date", y = "Deaths", title ="COVID-19 Deaths Worldwide")
## Warning: Removed 2128 rows containing missing values (geom_point).

##Exercise 5

head(time_series_long_joined)
## # A tibble: 6 x 7
##   Province_State Country_Region   Lat  Long Date       Confirmed Deaths
##   <chr>          <chr>          <dbl> <dbl> <date>         <dbl>  <dbl>
## 1 <NA>           Afghanistan     33.9  67.7 2020-01-22         0      0
## 2 <NA>           Afghanistan     33.9  67.7 2020-01-23         0      0
## 3 <NA>           Afghanistan     33.9  67.7 2020-01-24         0      0
## 4 <NA>           Afghanistan     33.9  67.7 2020-01-25         0      0
## 5 <NA>           Afghanistan     33.9  67.7 2020-01-26         0      0
## 6 <NA>           Afghanistan     33.9  67.7 2020-01-27         0      0
time_series_long_joined_Deaths_Confirmed <- time_series_long_joined %>%
  mutate(Deaths_over_Confirmed = Deaths / Confirmed) 
  head(time_series_long_joined_Deaths_Confirmed)
## # A tibble: 6 x 8
##   Province_State Country_Region   Lat  Long Date       Confirmed Deaths
##   <chr>          <chr>          <dbl> <dbl> <date>         <dbl>  <dbl>
## 1 <NA>           Afghanistan     33.9  67.7 2020-01-22         0      0
## 2 <NA>           Afghanistan     33.9  67.7 2020-01-23         0      0
## 3 <NA>           Afghanistan     33.9  67.7 2020-01-24         0      0
## 4 <NA>           Afghanistan     33.9  67.7 2020-01-25         0      0
## 5 <NA>           Afghanistan     33.9  67.7 2020-01-26         0      0
## 6 <NA>           Afghanistan     33.9  67.7 2020-01-27         0      0
## # … with 1 more variable: Deaths_over_Confirmed <dbl>

##Exercise 6

time_series_long_joined_Deaths_Confirmed %>% 
    ggplot(aes(x = Date,  y = Deaths_over_Confirmed)) + 
    geom_point() + 
    ggtitle("COVID-19 US Death/Confirmed Cases Per Day")
## Warning: Removed 11376 rows containing missing values (geom_point).

##Exercise 7

time_series_long_joined %>% 
    group_by(Country_Region,Date) %>% 
    summarise_at(c("Confirmed", "Deaths"), sum) %>% 
    filter (Country_Region %in% c("United Kingdom","France","Italy","Brazil", "India","Peru", "Spain", "Iran", "Mexico", "US")) %>% 
    ggplot(aes(x = Date,  y = Deaths, color = Country_Region)) + geom_point() +
    geom_line() +
    ggtitle("COVID-19 Top 10 Death Totals")

## Exercise 8

time_series_long_joined %>% 
  group_by(Country_Region,Date) %>% 
  summarise_at(c("Confirmed", "Deaths"), sum) %>% 
  filter (Country_Region %in% c("United Kingdom","France","Italy","Brazil", "India", "Peru", "Spain", "Iran", "Mexico", "US")) %>% 
    ggplot(aes(x = Date,  y = Deaths, color = Country_Region)) + 
    geom_point() +
    geom_line() +
    ggtitle("COVID-19 Top 10 Death Totals") +
    facet_wrap(~Country_Region, ncol=2, scales="free_y")

## Exercise 9

time_series_US_confirmed <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")) 
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   iso2 = col_character(),
##   iso3 = col_character(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Combined_Key = col_character()
## )
## See spec(...) for full column specifications.
head(time_series_US_confirmed)
## # A tibble: 6 x 260
##      UID iso2  iso3  code3  FIPS Admin2 Province_State Country_Region   Lat
##    <dbl> <chr> <chr> <dbl> <dbl> <chr>  <chr>          <chr>          <dbl>
## 1 8.40e7 US    USA     840  1001 Autau… Alabama        US              32.5
## 2 8.40e7 US    USA     840  1003 Baldw… Alabama        US              30.7
## 3 8.40e7 US    USA     840  1005 Barbo… Alabama        US              31.9
## 4 8.40e7 US    USA     840  1007 Bibb   Alabama        US              33.0
## 5 8.40e7 US    USA     840  1009 Blount Alabama        US              34.0
## 6 8.40e7 US    USA     840  1011 Bullo… Alabama        US              32.1
## # … with 251 more variables: Long_ <dbl>, Combined_Key <chr>,
## #   `1/22/20` <dbl>, `1/23/20` <dbl>, `1/24/20` <dbl>, `1/25/20` <dbl>,
## #   `1/26/20` <dbl>, `1/27/20` <dbl>, `1/28/20` <dbl>, `1/29/20` <dbl>,
## #   `1/30/20` <dbl>, `1/31/20` <dbl>, `2/1/20` <dbl>, `2/2/20` <dbl>,
## #   `2/3/20` <dbl>, `2/4/20` <dbl>, `2/5/20` <dbl>, `2/6/20` <dbl>,
## #   `2/7/20` <dbl>, `2/8/20` <dbl>, `2/9/20` <dbl>, `2/10/20` <dbl>,
## #   `2/11/20` <dbl>, `2/12/20` <dbl>, `2/13/20` <dbl>, `2/14/20` <dbl>,
## #   `2/15/20` <dbl>, `2/16/20` <dbl>, `2/17/20` <dbl>, `2/18/20` <dbl>,
## #   `2/19/20` <dbl>, `2/20/20` <dbl>, `2/21/20` <dbl>, `2/22/20` <dbl>,
## #   `2/23/20` <dbl>, `2/24/20` <dbl>, `2/25/20` <dbl>, `2/26/20` <dbl>,
## #   `2/27/20` <dbl>, `2/28/20` <dbl>, `2/29/20` <dbl>, `3/1/20` <dbl>,
## #   `3/2/20` <dbl>, `3/3/20` <dbl>, `3/4/20` <dbl>, `3/5/20` <dbl>,
## #   `3/6/20` <dbl>, `3/7/20` <dbl>, `3/8/20` <dbl>, `3/9/20` <dbl>,
## #   `3/10/20` <dbl>, `3/11/20` <dbl>, `3/12/20` <dbl>, `3/13/20` <dbl>,
## #   `3/14/20` <dbl>, `3/15/20` <dbl>, `3/16/20` <dbl>, `3/17/20` <dbl>,
## #   `3/18/20` <dbl>, `3/19/20` <dbl>, `3/20/20` <dbl>, `3/21/20` <dbl>,
## #   `3/22/20` <dbl>, `3/23/20` <dbl>, `3/24/20` <dbl>, `3/25/20` <dbl>,
## #   `3/26/20` <dbl>, `3/27/20` <dbl>, `3/28/20` <dbl>, `3/29/20` <dbl>,
## #   `3/30/20` <dbl>, `3/31/20` <dbl>, `4/1/20` <dbl>, `4/2/20` <dbl>,
## #   `4/3/20` <dbl>, `4/4/20` <dbl>, `4/5/20` <dbl>, `4/6/20` <dbl>,
## #   `4/7/20` <dbl>, `4/8/20` <dbl>, `4/9/20` <dbl>, `4/10/20` <dbl>,
## #   `4/11/20` <dbl>, `4/12/20` <dbl>, `4/13/20` <dbl>, `4/14/20` <dbl>,
## #   `4/15/20` <dbl>, `4/16/20` <dbl>, `4/17/20` <dbl>, `4/18/20` <dbl>,
## #   `4/19/20` <dbl>, `4/20/20` <dbl>, `4/21/20` <dbl>, `4/22/20` <dbl>,
## #   `4/23/20` <dbl>, `4/24/20` <dbl>, `4/25/20` <dbl>, `4/26/20` <dbl>,
## #   `4/27/20` <dbl>, `4/28/20` <dbl>, …
time_series_US_confirmed_long <- time_series_US_confirmed %>% group_by(Province_State) %>% select(-c(UID,iso2, iso3, code3, FIPS, Admin2, Lat, Long_,Country_Region, Combined_Key)) %>% pivot_longer(-c(Province_State)) %>% group_by(Province_State, name) %>% summarise(value = sum(value))
## `summarise()` regrouping output by 'Province_State' (override with `.groups` argument)
time_series_US_confirmed_long %>% group_by(Province_State) %>% ggplot(aes(x = name,  y = value, color = Province_State)) + 
    geom_point() + facet_wrap(~Province_State, ncol = 5) + labs(x = "Date", y = "Confirmed", title = "COVID-19 US States and Territories Confirmed") 

##Exercise 10

time_series_long_joined %>% 
  group_by(Country_Region,Date) %>% 
  summarise_at(c("Confirmed", "Deaths"), sum) %>% 
  filter (Country_Region %in% c("China","France","Italy","Brazil", "India", "UK", "Peru", "Spain", "Iran", "Mexico", "US")) %>% 
    ggplot(aes(x = Date,  y = Deaths)) + 
    geom_point() +
    geom_line() +
    ggtitle("COVID-19 Top 10 Death Totals") +
    facet_wrap(~Country_Region, ncol=2, scales="free_y") +
  theme_dark()